import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout, Add
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
dataset= pd.read_csv('Electricity_Consumption.csv')
df=dataset.drop(['DATE'], axis=1)
df.head(2)| Humidity | Temperature | Electricity | |
|---|---|---|---|
| 0 | 92 | -1.1 | 914 |
| 1 | 92 | -1.5 | 887 |
| 2 | 91 | -1.5 | 865 |
| 3 | 88 | -1.1 | 852 |
| 4 | 87 | -1.3 | 852 |
| ... | ... | ... | ... |
| 8755 | 95 | -1.2 | 1180 |
| 8756 | 93 | 0.8 | 1133 |
| 8757 | 94 | 1.9 | 1073 |
| 8758 | 93 | 2.2 | 994 |
| 8759 | 95 | 2.6 | 919 |
8760 rows × 3 columns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# Load dataset
dataset = pd.read_csv('Electricity_Consumption.csv')
X= dataset[['Humidity','Temperature']]
y= dataset['Electricity']
# Split into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)from sklearn.preprocessing import MinMaxScaler
sc_X=MinMaxScaler()
X_train=sc_X.fit_transform(X_train)
X_test=sc_X.transform(X_test)
sc_y=MinMaxScaler()
y_train=sc_y.fit_transform(y_train)
y_test=sc_y.transform(y_test)
# Define sequence length (time steps)
seq_length = 50 # Number of past time steps to use for prediction
# Function to create sequences
def create_sequences(data, seq_length):
X_train, X_test, y_train, y_test = [], [],[],[]
for i in range(len(data) - seq_length):
X_train.append(data[i:i+seq_length])
X_test.append(data[i:i+seq_length]) # Sequence of past time steps
y_train.append(data[i+seq_length])
y_test.append(data[i+seq_length])# Predict the next step
return np.array(X_train), np.array(X_test),np.array(y_train),np.array(y_test)
# Generate X (input) and y (output)
X_train, X_test, y_train, y_test = create_sequences(data, seq_length)
print("Training Data Shape:", X_train.shape, y_train.shape)
print("Validation Data Shape:", X_val.shape, y_val.shape)Training Data Shape: (8710, 50, 3) (8710, 3)
Validation Data Shape: (1752, 2) (1752,)
def positional_encoding(seq_length, d_model):
pos = np.arange(seq_length)[:, np.newaxis]
i = np.arange(d_model)[np.newaxis, :]
angles = pos / np.power(10000, (2 * (i // 2)) / d_model)
angles[:, 0::2] = np.sin(angles[:, 0::2]) # Apply sin to even indices
angles[:, 1::2] = np.cos(angles[:, 1::2]) # Apply cos to odd indices
return tf.constant(angles, dtype=tf.float32)def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
# Multi-Head Self Attention
x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
x = Dropout(dropout)(x)
x = Add()([x, inputs])
x = LayerNormalization(epsilon=1e-6)(x)
# Feed Forward Network
x_ff = Dense(ff_dim, activation="relu")(x)
x_ff = Dropout(dropout)(x_ff)
x_ff = Dense(inputs.shape[-1])(x_ff)
x = Add()([x, x_ff])
x = LayerNormalization(epsilon=1e-6)(x)
return xdef build_transformer_model(input_shape, head_size=64, num_heads=4, ff_dim=128, num_layers=2, dropout=0.1):
inputs = Input(shape=input_shape)
x = inputs
# Add positional encoding
seq_length, d_model = input_shape
pos_encoding = positional_encoding(seq_length, d_model)
x = inputs + pos_encoding
# Stacking multiple Transformer encoder blocks
for _ in range(num_layers):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
# Global average pooling for output
x = keras.layers.GlobalAveragePooling1D()(x)
outputs = Dense(1)(x)
return Model(inputs, outputs)
# Model Summary
input_shape = (50, 3) # (Time Steps, Features)
forecast_horizon = 10
#model = build_transformer_model(input_shape,forecast_horizon )
#model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.summary()Model: "functional_6"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ input_layer_6 │ (None, 50, 3) │ 0 │ - │ │ (InputLayer) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add_24 (Add) │ (None, 50, 3) │ 0 │ input_layer_6[0]… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ multi_head_attenti… │ (None, 50, 3) │ 603 │ add_24[0][0], │ │ (MultiHeadAttentio… │ │ │ add_24[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout_37 │ (None, 50, 3) │ 0 │ multi_head_atten… │ │ (Dropout) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add_25 (Add) │ (None, 50, 3) │ 0 │ dropout_37[0][0], │ │ │ │ │ add_24[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ layer_normalizatio… │ (None, 50, 3) │ 6 │ add_25[0][0] │ │ (LayerNormalizatio… │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_30 (Dense) │ (None, 50, 128) │ 512 │ layer_normalizat… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout_38 │ (None, 50, 128) │ 0 │ dense_30[0][0] │ │ (Dropout) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_31 (Dense) │ (None, 50, 3) │ 387 │ dropout_38[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add_26 (Add) │ (None, 50, 3) │ 0 │ layer_normalizat… │ │ │ │ │ dense_31[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ layer_normalizatio… │ (None, 50, 3) │ 6 │ add_26[0][0] │ │ (LayerNormalizatio… │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ multi_head_attenti… │ (None, 50, 3) │ 603 │ layer_normalizat… │ │ (MultiHeadAttentio… │ │ │ layer_normalizat… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout_40 │ (None, 50, 3) │ 0 │ multi_head_atten… │ │ (Dropout) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add_27 (Add) │ (None, 50, 3) │ 0 │ dropout_40[0][0], │ │ │ │ │ layer_normalizat… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ layer_normalizatio… │ (None, 50, 3) │ 6 │ add_27[0][0] │ │ (LayerNormalizatio… │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_32 (Dense) │ (None, 50, 128) │ 512 │ layer_normalizat… │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dropout_41 │ (None, 50, 128) │ 0 │ dense_32[0][0] │ │ (Dropout) │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_33 (Dense) │ (None, 50, 3) │ 387 │ dropout_41[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ add_28 (Add) │ (None, 50, 3) │ 0 │ layer_normalizat… │ │ │ │ │ dense_33[0][0] │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ layer_normalizatio… │ (None, 50, 3) │ 6 │ add_28[0][0] │ │ (LayerNormalizatio… │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ global_average_poo… │ (None, 3) │ 0 │ layer_normalizat… │ │ (GlobalAveragePool… │ │ │ │ ├─────────────────────┼───────────────────┼────────────┼───────────────────┤ │ dense_34 (Dense) │ (None, 1) │ 4 │ global_average_p… │ └─────────────────────┴───────────────────┴────────────┴───────────────────┘
Total params: 9,098 (35.54 KB)
Trainable params: 3,032 (11.84 KB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 6,066 (23.70 KB)
from keras_tuner import BayesianOptimization
def model_builder(hp):
head_size = hp.Int("head_size", 32, 128, step=32)
num_heads = hp.Int("num_heads", 2, 8, step=2)
ff_dim = hp.Int("ff_dim", 64, 512, step=64)
dropout = hp.Float("dropout", 0.1, 0.5, step=0.1)
model = build_transformer_model(head_size=head_size, num_heads=num_heads, ff_dim=ff_dim, dropout=dropout, input_shape=input_shape)
model.compile(optimizer="adam", loss="mse")
return model
tuner = BayesianOptimization(model_builder, objective="val_loss", max_trials=5, directory="tuner")
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hps.values}")Trial 5 Complete [00h 02m 56s]
val_loss: 0.05065935477614403
Best val_loss So Far: 0.05065935477614403
Total elapsed time: 00h 16m 58s
Best Hyperparameters: {'head_size': 64, 'num_heads': 6, 'ff_dim': 128, 'dropout': 0.1}
best_model = tuner.get_best_models(num_models=1)[0]D:\Anaconda\Lib\site-packages\keras\src\saving\saving_lib.py:757: UserWarning: Skipping variable loading for optimizer 'adam', because it has 2 variables whereas the saved optimizer has 70 variables.
saveable.load_own_variables(weights_store.get(inner_path))
history = best_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)Epoch 1/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 44s 91ms/step - loss: 0.0513 - val_loss: 0.0504
Epoch 2/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 18s 65ms/step - loss: 0.0506 - val_loss: 0.0506
Epoch 3/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 62ms/step - loss: 0.0515 - val_loss: 0.0505
Epoch 4/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 18s 65ms/step - loss: 0.0508 - val_loss: 0.0509
Epoch 5/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 57ms/step - loss: 0.0506 - val_loss: 0.0502
Epoch 6/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 56ms/step - loss: 0.0505 - val_loss: 0.0503
Epoch 7/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 54ms/step - loss: 0.0504 - val_loss: 0.0508
Epoch 8/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 59ms/step - loss: 0.0505 - val_loss: 0.0504
Epoch 9/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 61ms/step - loss: 0.0509 - val_loss: 0.0502
Epoch 10/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 62ms/step - loss: 0.0500 - val_loss: 0.0504
Epoch 11/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 63ms/step - loss: 0.0503 - val_loss: 0.0498
Epoch 12/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 24s 87ms/step - loss: 0.0502 - val_loss: 0.0498
Epoch 13/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 46s 104ms/step - loss: 0.0501 - val_loss: 0.0498
Epoch 14/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 16s 57ms/step - loss: 0.0499 - val_loss: 0.0500
Epoch 15/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 17s 60ms/step - loss: 0.0502 - val_loss: 0.0501
Epoch 16/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 22s 65ms/step - loss: 0.0499 - val_loss: 0.0499
Epoch 17/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 15s 54ms/step - loss: 0.0505 - val_loss: 0.0499
Epoch 18/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 19s 69ms/step - loss: 0.0501 - val_loss: 0.0497
Epoch 19/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 23s 85ms/step - loss: 0.0503 - val_loss: 0.0498
Epoch 20/20
273/273 ━━━━━━━━━━━━━━━━━━━━ 27s 99ms/step - loss: 0.0506 - val_loss: 0.0499
preds = model.predict(X_test)
y_pred= sc_y.inverse_transform(preds)
print("Sample Prediction:", y_pred[:10])273/273 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
Sample Prediction: [[1193.0508]
[1180.5586]
[1186.2701]
[1191.1058]
[1191.3317]
[1189.4683]
[1191.2549]
[1182.8384]
[1194.8209]
[1197.2668]]